#Althoff IFN data processing
This analysis is to look at why Scribble KO HSC are insensitive to activating signals from interferon. To understand this WT and Scibble KO mice are treated or not with PolyIC an interferon stimulator. There are 4 groups here with WT, KO, WTIC, and KOIC. There are 3 mice with untreated condition and 4 mice per treated condition.
library(ggplot2)
library(DESeq2)
library(tximport)
library(readr)
library(tximportData)
library(readxl)
library(knitr)
library(tidyverse)
library(pheatmap)
library(RColorBrewer)
library(viridis)
library(ggrepel)
library(EnhancedVolcano)
library(fgsea)
library(limma)
library(VennDiagram)
library(UpSetR)
library(wesanderson)
library(kableExtra)
library(reshape)
library(dplyr)
library(msigdbr)
## New names:
## * `` -> ...1
| samples | reads before trim | reads after trim | %Q30 | duplication | % of reads with adapter | Insert size | percent aligned | splices annotated | salmon percent aligned |
|---|---|---|---|---|---|---|---|---|---|
| WT_rep1 | 61002551 | 60449791 | 92.4929 | 12.0915 | 9.561156 | 118 | 78.64 | 9198083 | 59.6994 |
| WT_rep2 | 59366695 | 58759406 | 91.5568 | 11.6178 | 8.725623 | 119 | 79.87 | 10097467 | 62.0906 |
| WT_rep3 | 70194654 | 69436669 | 92.0470 | 12.9095 | 8.320139 | 119 | 79.59 | 11066266 | 62.8137 |
| KO_rep1 | 63782461 | 63126127 | 91.7028 | 12.4218 | 1.109101 | 119 | 72.87 | 8375861 | 51.9990 |
| KO_rep2 | 66128187 | 65388713 | 91.3069 | 13.5671 | 8.849251 | 117 | 77.00 | 8346970 | 56.2829 |
| KO_rep3 | 63880365 | 63168300 | 91.0102 | 13.0646 | 12.360554 | 119 | 71.95 | 8921300 | 56.4861 |
| WTIC_rep1 | 41050763 | 40183636 | 90.1762 | 13.9266 | 3.044598 | 119 | 80.23 | 5500867 | 53.5136 |
| WTIC_rep2 | 39474990 | 38752578 | 91.7329 | 12.6450 | 4.681941 | 119 | 81.57 | 5055002 | 52.9577 |
| WTIC_rep3 | 38524980 | 37699529 | 89.4794 | 14.3038 | 3.207358 | 118 | 79.34 | 5343450 | 55.9974 |
| WTIC_rep4 | 37942449 | 37173849 | 90.6850 | 12.4860 | 3.233808 | 119 | 80.16 | 4726578 | 52.9712 |
| KOIC_rep1 | 41161158 | 40368325 | 91.3712 | 14.3709 | 3.844799 | 117 | 80.44 | 4962629 | 52.4011 |
| KOIC_rep2 | 40345747 | 39535690 | 90.9779 | 13.8305 | 3.955074 | 119 | 80.05 | 5199790 | 53.6240 |
| KOIC_rep3 | 43540976 | 42726703 | 91.7218 | 14.2954 | 3.428689 | 119 | 80.06 | 5860118 | 55.7803 |
| KOIC_rep4 | 41799389 | 40956452 | 90.8947 | 14.8623 | 3.349685 | 119 | 80.46 | 4838976 | 52.2488 |
| Sample | Condition |
|---|---|
| WT_rep1 | WT |
| WT_rep2 | WT |
| WT_rep3 | WT |
| KO_rep1 | KO |
| KO_rep2 | KO |
| KO_rep3 | KO |
| WTIC_rep1 | WTIC |
| WTIC_rep2 | WTIC |
| WTIC_rep3 | WTIC |
| WTIC_rep4 | WTIC |
| KOIC_rep1 | KOIC |
| KOIC_rep2 | KOIC |
| KOIC_rep3 | KOIC |
| KOIC_rep4 | KOIC |
We see high correlation between all samples that were treated regardless of phenotype. In the untreated samples there was less correlation this may be due to the mouse variability.
This doesn’t seem to match the correlation data which indicated that the treated samples were more similar to each other.
## [1] TRUE
## [1] TRUE
## using just counts from tximport
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## [1] 4319
## [1] 1180
## [1] 31
## [1] 4329
kable(significant_genes)
| comparison | sig.genes |
|---|---|
| WTvsWTIC | 4319 |
| WTvKO | 1180 |
| WTICvKOIC | 31 |
| KOvKOIC | 4329 |
DEG_WTvKO<-as.data.frame(res_WTvKO) %>%
filter(padj<.01 & abs(log2FoldChange) >= .5)%>%
rownames_to_column(var="ensembl_gene_id")%>%
left_join(ens2gene,by="ensembl_gene_id")%>%
select(Gene, padj, pvalue, log2FoldChange)%>%
arrange(padj)
DEG_WTvWTIC<-as.data.frame(res_WTvWTIC)%>%
filter(padj<.01 & abs(log2FoldChange) >= .5)%>%
rownames_to_column(var="ensembl_gene_id")%>%
left_join(ens2gene,by="ensembl_gene_id")%>%
select(Gene, padj, pvalue, log2FoldChange)%>%
arrange(padj)
DEG_WTICvKOIC<-as.data.frame(res_WTICvKOIC)%>%
filter(padj<.01 & abs(log2FoldChange) >= .5)%>%
rownames_to_column(var="ensembl_gene_id")%>%
left_join(ens2gene,by="ensembl_gene_id")%>%
select(Gene, padj, pvalue, log2FoldChange)%>%
arrange(padj)
DEG_KOvKOIC<-as.data.frame(res_KOvKOIC)%>%
filter(padj<.01 & abs(log2FoldChange) >= .5)%>%
rownames_to_column(var="ensembl_gene_id")%>%
left_join(ens2gene,by="ensembl_gene_id")%>%
select(Gene, padj, pvalue, log2FoldChange)%>%
arrange(padj)
plotDispEsts(dds.filtered)
# Volcano Plots
## Warning: Removed 6293 rows containing missing values (geom_point).
## Warning: Removed 4220 rows containing missing values (geom_point).
## Warning: Removed 4220 rows containing missing values (geom_point).
## Warning: Removed 4770 rows containing missing values (geom_point).
## Warning: Unknown or uninitialised column: `Mouse_genotype`.
top_20_WTvKO<-DEG_WTvKO[1:20, 1]
top_20_WTvWTIC<-DEG_WTvWTIC[1:20, 1]
top_20_KOvKOIC<-DEG_KOvKOIC[1:20, 1]
top_20_WTICvKOIC<-DEG_WTvWTIC[1:20, 1]
normalized_counts <- counts(dds.filtered, normalized=T)
normalized_counts<-as.data.frame(normalized_counts)
normalized_counts<-rownames_to_column(normalized_counts, var="ensembl_gene_id")
normalized_counts<-inner_join(normalized_counts, ens2gene, by="ensembl_gene_id")
top20_norm_counts_WTvKO <- filter(normalized_counts, Gene %in% top_20_WTvKO)%>%
select(Gene, WT_rep1, WT_rep2, WT_rep3, KO_rep1, KO_rep2, KO_rep3)
## stopped here there is something wrong
melted_top20_norm_counts_WTvKO <- data.frame(melt(top20_norm_counts_WTvKO))
## Using Gene as id variables
colnames(melted_top20_norm_counts_WTvKO)<-c("Gene", "Sample", "normalized_counts")
melted_top20_norm_counts_WTvKO<-full_join(melted_top20_norm_counts_WTvKO,metadata, by="Sample")
melted_top20_norm_counts_WTvKO<-filter(melted_top20_norm_counts_WTvKO, Condition!=c("WTIC", "KOIC"))
ggplot(melted_top20_norm_counts_WTvKO) +
geom_point(aes(x = Gene, y = normalized_counts, color = Condition)) +
scale_y_log10() +
xlab("Genes") +
ylab("Normalized Counts") +
ggtitle("Top 20 Significant DE Genes") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title=element_text(hjust=0.5))
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 4 rows containing missing values (geom_point).
kable(top20_norm_counts_WTvKO)
| Gene | WT_rep1 | WT_rep2 | WT_rep3 | KO_rep1 | KO_rep2 | KO_rep3 |
|---|---|---|---|---|---|---|
| Ldhc | 0.00000 | 0.0000 | 0.0000 | 0.000000 | 0.000000 | 106.04029 |
| Ttc16 | 0.00000 | 0.0000 | 167.7153 | 0.000000 | 0.000000 | 0.00000 |
| Gm15429 | 0.00000 | 0.0000 | 0.0000 | 0.000000 | 427.015366 | 0.00000 |
| Olfr248 | 0.00000 | 0.0000 | 0.0000 | 209.025545 | 0.000000 | 0.00000 |
| Cyp4f39 | 0.00000 | 0.0000 | 0.0000 | 126.581207 | 0.000000 | 0.00000 |
| Gm10221 | 0.00000 | 0.0000 | 0.0000 | 0.000000 | 0.000000 | 139.93841 |
| Gm3086 | 0.00000 | 0.0000 | 0.0000 | 0.000000 | 0.000000 | 89.52581 |
| Gm17022 | 86.44889 | 0.0000 | 0.0000 | 0.000000 | 0.000000 | 0.00000 |
| Gm26571 | 0.00000 | 0.0000 | 0.0000 | 0.000000 | 234.898812 | 0.00000 |
| H2-Pa | 0.00000 | 0.0000 | 0.0000 | 85.775423 | 0.000000 | 0.00000 |
| Gm37571 | 0.00000 | 0.0000 | 0.0000 | 0.000000 | 89.600578 | 0.00000 |
| Gm38289 | 0.00000 | 0.0000 | 0.0000 | 0.000000 | 269.608946 | 0.00000 |
| Gm10728 | 0.00000 | 0.0000 | 0.0000 | 83.277110 | 0.000000 | 0.00000 |
| Gm19148 | 0.00000 | 0.0000 | 0.0000 | 0.000000 | 0.000000 | 217.29567 |
| Gm43890 | 0.00000 | 0.0000 | 0.0000 | 0.000000 | 104.130401 | 0.00000 |
| Gm45399 | 130.90833 | 142.9232 | 343.3146 | 5.829398 | 5.650487 | 0.00000 |
| Gm49369 | 0.00000 | 0.0000 | 0.0000 | 213.189401 | 0.000000 | 0.00000 |
| Gm45632 | 0.00000 | 0.0000 | 0.0000 | 94.103134 | 0.000000 | 0.00000 |
| Gm47502 | 0.00000 | 0.0000 | 0.0000 | 0.000000 | 164.671332 | 0.00000 |
| Gm49522 | 0.00000 | 0.0000 | 0.0000 | 0.000000 | 0.000000 | 95.61009 |
ens2gene <- unique(ens2gene)
#pulls the hallmark gene set for mouse
h_gene_sets_mouse = msigdbr(species = "mouse", category = "H")
mouse.hallmark.list = base::split(x = h_gene_sets_mouse$gene_symbol, f = h_gene_sets_mouse$gs_name)
#tidying data
WTvKO_gsea<-results(dds.filtered, contrast=c("Condition", "WT", "KO"),tidy=TRUE)
colnames(WTvKO_gsea)[1]<-"ensembl_gene_id"
WTvKO_gsea<-inner_join(WTvKO_gsea, ens2gene, by="ensembl_gene_id")
WTvKO_gsea_sum<-WTvKO_gsea%>%
dplyr::select(Gene, stat) %>%
na.omit() %>%
distinct() %>%
group_by(Gene) %>%
summarise(stat=mean(stat))
rank_posWTvKO<-deframe(WTvKO_gsea_sum)
fgsea_posWTvKO_hallmark<- fgsea(pathways=mouse.hallmark.list, stats=rank_posWTvKO)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.29% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fgseaRes_posWTvKO_tidy <-fgsea_posWTvKO_hallmark%>%
as_tibble() %>%
arrange(desc(NES))
WTvWTIC_gsea<-results(dds.filtered, contrast=c("Condition", "WT", "WTIC"), tidy=TRUE)
colnames(WTvWTIC_gsea)[1]<-"ensembl_gene_id"
WTvWTIC_gsea<-inner_join(WTvWTIC_gsea, ens2gene, by="ensembl_gene_id")
WTvWTIC_gsea_sum<-WTvWTIC_gsea%>%
dplyr::select(Gene, stat) %>%
na.omit() %>%
distinct() %>%
group_by(Gene) %>%
summarise(stat=mean(stat))
rank_posWTvWTIC<-deframe(WTvWTIC_gsea_sum)
fgsea_posWTvWTIC_hallmark<- fgsea(pathways=mouse.hallmark.list, stats=rank_posWTvWTIC)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.18% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
fgseaRes_posWTvWTIC_tidy <-fgsea_posWTvWTIC_hallmark%>%
as_tibble() %>%
arrange(desc(NES))
KOvKOIC_gsea<-results(dds.filtered, contrast=c("Condition","KO", "KOIC"),tidy=TRUE)
colnames(KOvKOIC_gsea)[1]<-"ensembl_gene_id"
KOvKOIC_gsea<-inner_join(KOvKOIC_gsea, ens2gene, by="ensembl_gene_id")
KOvKOIC_gsea_sum<-KOvKOIC_gsea%>%
dplyr::select(Gene, stat) %>%
na.omit() %>%
distinct() %>%
group_by(Gene) %>%
summarise(stat=mean(stat))
rank_posKOvKOIC<-deframe(KOvKOIC_gsea_sum)
fgsea_posKOvKOIC_hallmark<- fgsea(pathways=mouse.hallmark.list, stats=rank_posKOvKOIC)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
fgseaRes_posKOvKOIC_tidy <-fgsea_posKOvKOIC_hallmark%>%
as_tibble() %>%
arrange(desc(NES))
WTICvKOIC_gsea<-results(dds.filtered, contrast=c("Condition", "WTIC", "KOIC"),tidy=TRUE)
colnames(WTICvKOIC_gsea)[1]<-"ensembl_gene_id"
WTICvKOIC_gsea<-inner_join(WTICvKOIC_gsea, ens2gene, by="ensembl_gene_id")
WTICvKOIC_gsea_sum<-WTICvKOIC_gsea%>%
dplyr::select(Gene, stat) %>%
na.omit() %>%
distinct() %>%
group_by(Gene) %>%
summarise(stat=mean(stat))
rank_posWTICvKOIC<-deframe(WTICvKOIC_gsea_sum)
fgsea_posWTICvKOIC_hallmark<- fgsea(pathways=mouse.hallmark.list, stats=rank_posWTICvKOIC)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.24% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
fgseaRes_posWTICvKOIC_tidy <-fgsea_posWTICvKOIC_hallmark%>%
as_tibble() %>%
arrange(desc(NES))